from langchain.text_splitter import CharacterTextSplitter


with open(r"F:\ai\03大模型开发实战\05langchain深度解析\LangChain全面剖析之Retrieval资料\data\langchain.txt",encoding="utf-8") as f:
    langchain_desc = f.read()

text_split = CharacterTextSplitter.from_tiktoken_encoder(encoding_name="cl100k_base",
                                            chunk_size=60,
                                            chunk_overlap=0)

texts = text_split.split_text(langchain_desc)
print(texts)

text_split1 = CharacterTextSplitter.from_tiktoken_encoder(model_name="gpt-4",
                                                          chunk_size=60,
                                                          chunk_overlap=0)
text1 = text_split1.split_text(langchain_desc)
print(text1)